Biblioteka seaborn (sns)¶

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [4]:
x = np.linspace(0,20,100)
In [5]:
y = np.random.randn(100,4)
In [6]:
y = np.cumsum(y, axis = 0)
In [7]:
plt.plot(x,y)
Out[7]:
[<matplotlib.lines.Line2D at 0x29ee55ac340>,
 <matplotlib.lines.Line2D at 0x29ee55ac3a0>,
 <matplotlib.lines.Line2D at 0x29ee55ac4c0>,
 <matplotlib.lines.Line2D at 0x29ee55ac5e0>]
In [8]:
sns.set()
# tutaj mozemy zmienic parametry
In [9]:
plt.plot(x,y)
Out[9]:
[<matplotlib.lines.Line2D at 0x29ee76bb3d0>,
 <matplotlib.lines.Line2D at 0x29ee76bb430>,
 <matplotlib.lines.Line2D at 0x29ee76bb550>,
 <matplotlib.lines.Line2D at 0x29ee76bb670>]
In [10]:
df = sns.load_dataset('tips')
df.head()
Out[10]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [11]:
df.describe()
Out[11]:
total_bill tip size
count 244.000000 244.000000 244.000000
mean 19.785943 2.998279 2.569672
std 8.902412 1.383638 0.951100
min 3.070000 1.000000 1.000000
25% 13.347500 2.000000 2.000000
50% 17.795000 2.900000 2.000000
75% 24.127500 3.562500 3.000000
max 50.810000 10.000000 6.000000
In [12]:
df.describe(include = ['category'])
Out[12]:
sex smoker day time
count 244 244 244 244
unique 2 2 4 2
top Male No Sat Dinner
freq 157 151 87 176
In [13]:
sns.relplot(data = df, x = 'total_bill', y='tip')
Out[13]:
<seaborn.axisgrid.FacetGrid at 0x29ee55e91b0>
In [14]:
df.plot.scatter(x = 'total_bill', y='tip')
*c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*.  Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points.
Out[14]:
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
In [15]:
sns.set(font_scale = 1.2)
In [16]:
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day')
Out[16]:
<seaborn.axisgrid.FacetGrid at 0x29ee7873310>
In [17]:
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day', palette = 'viridis')
Out[17]:
<seaborn.axisgrid.FacetGrid at 0x29ee78fc430>
In [18]:
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day', palette = 'viridis', col='time')
Out[18]:
<seaborn.axisgrid.FacetGrid at 0x29ee77d4f70>
In [19]:
sns.relplot(data = df, x = 'total_bill', y='tip', hue ='day', palette = 'viridis', col='time', row='smoker')
Out[19]:
<seaborn.axisgrid.FacetGrid at 0x29ee7a9c8b0>
In [20]:
df = sns.load_dataset('fmri')
df[:5]
Out[20]:
subject timepoint event region signal
0 s13 18 stim parietal -0.017552
1 s5 14 stim parietal -0.080883
2 s12 18 stim parietal -0.081033
3 s11 18 stim parietal -0.046134
4 s10 18 stim parietal -0.037970
In [21]:
sns.relplot(data = df, x = 'timepoint', y='signal')
Out[21]:
<seaborn.axisgrid.FacetGrid at 0x29ee7beefb0>
In [22]:
sns.relplot(data = df, x = 'timepoint', y='signal', kind = 'line', ci = None)
Out[22]:
<seaborn.axisgrid.FacetGrid at 0x29ee7bef160>
In [23]:
df['timepoint'].value_counts()
Out[23]:
18    56
10    56
4     56
5     56
6     56
2     56
8     56
7     56
3     56
11    56
14    56
12    56
13    56
0     56
15    56
16    56
9     56
17    56
1     56
Name: timepoint, dtype: int64
In [24]:
df.groupby('timepoint').agg('mean').plot()
Out[24]:
<AxesSubplot:xlabel='timepoint'>
In [25]:
sns.relplot(data = df, x = 'timepoint', y='signal', hue = 'event', style = 'event', col = 'subject', col_wrap=4)
Out[25]:
<seaborn.axisgrid.FacetGrid at 0x29ee8fff8e0>
In [26]:
df = sns.load_dataset('tips')
df.head()
Out[26]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [27]:
sns.set(style = 'ticks', palette = 'pastel')
sns.catplot(data =df, x ='day', y='total_bill')
Out[27]:
<seaborn.axisgrid.FacetGrid at 0x29ee8fff550>
In [28]:
sns.catplot(data =df, x ='day', y='total_bill', kind = 'box')
Out[28]:
<seaborn.axisgrid.FacetGrid at 0x29eea078370>
In [29]:
df = sns.load_dataset('titanic')
sns.catplot(data =df, x ='sex', y='survived', kind = 'bar', hue = 'class')
Out[29]:
<seaborn.axisgrid.FacetGrid at 0x29eea9444f0>
In [30]:
sns.catplot(data =df, x ='deck', kind = 'count')
Out[30]:
<seaborn.axisgrid.FacetGrid at 0x29eea9442e0>
In [31]:
sns.catplot(data =df, y ='deck', kind = 'count')
Out[31]:
<seaborn.axisgrid.FacetGrid at 0x29eea11f340>
In [32]:
sns.catplot(data =df, y ='deck', kind = 'count', palette = 'Blues')
Out[32]:
<seaborn.axisgrid.FacetGrid at 0x29eea9855d0>
In [33]:
df = np.random.multivariate_normal(mean = [0,1], cov = [(1, -0.4), (-0.4, 1)], size =200)
df = pd.DataFrame(df, columns = ['x', 'y'])
In [34]:
df.describe()
Out[34]:
x y
count 200.000000 200.000000
mean -0.052478 0.976274
std 1.004343 1.074738
min -2.882450 -2.513433
25% -0.700322 0.335126
50% -0.018109 1.010378
75% 0.663557 1.706312
max 2.870270 3.488807
In [35]:
# wykres rozkladow
sns.jointplot(data = df, x ='x', y = 'y')
Out[35]:
<seaborn.axisgrid.JointGrid at 0x29eea1f6680>
In [36]:
# wykres relacji parami
df = sns.load_dataset('iris')
sns.pairplot(data = df)
Out[36]:
<seaborn.axisgrid.PairGrid at 0x29eea985630>
In [37]:
sns.pairplot(data = df, hue = 'species')
Out[37]:
<seaborn.axisgrid.PairGrid at 0x29eea170eb0>
In [38]:
df = sns.load_dataset('tips')
In [39]:
sns.regplot(data = df, x = 'total_bill', y ='tip')
Out[39]:
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
In [75]:
df = sns.load_dataset('flights')
df
Out[75]:
year month passengers
0 1949 Jan 112
1 1949 Feb 118
2 1949 Mar 132
3 1949 Apr 129
4 1949 May 121
... ... ... ...
139 1960 Aug 606
140 1960 Sep 508
141 1960 Oct 461
142 1960 Nov 390
143 1960 Dec 432

144 rows × 3 columns

In [41]:
df = df.pivot(index = 'month', columns = 'year', values = 'passengers')
In [42]:
# mapa ciepla
sns.heatmap(df)
Out[42]:
<AxesSubplot:xlabel='year', ylabel='month'>
In [43]:
sns.heatmap(df, annot = True, fmt='d')
Out[43]:
<AxesSubplot:xlabel='year', ylabel='month'>
In [44]:
df = sns.load_dataset('fmri')
df
Out[44]:
subject timepoint event region signal
0 s13 18 stim parietal -0.017552
1 s5 14 stim parietal -0.080883
2 s12 18 stim parietal -0.081033
3 s11 18 stim parietal -0.046134
4 s10 18 stim parietal -0.037970
... ... ... ... ... ...
1059 s0 8 cue frontal 0.018165
1060 s13 7 cue frontal -0.029130
1061 s12 7 cue frontal -0.004939
1062 s11 7 cue frontal -0.025367
1063 s0 0 cue parietal -0.006899

1064 rows × 5 columns

In [45]:
df = sns.load_dataset('tips')
df.head()
Out[45]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
In [46]:
sns.relplot(data = df, x='total_bill', y ='tip', hue = 'sex')
Out[46]:
<seaborn.axisgrid.FacetGrid at 0x29eef2ae890>
In [47]:
sns.catplot(data = df, x='time', y ='tip', kind = 'box')
Out[47]:
<seaborn.axisgrid.FacetGrid at 0x29eef293460>
In [48]:
sns.jointplot(data = df, x='total_bill', y ='tip')
Out[48]:
<seaborn.axisgrid.JointGrid at 0x29ef0607370>
In [49]:
sns.pairplot(data = df, hue = 'time')
Out[49]:
<seaborn.axisgrid.PairGrid at 0x29ef07ca8f0>
In [50]:
sns.regplot(data = df, x = 'total_bill', y = 'tip')
Out[50]:
<AxesSubplot:xlabel='total_bill', ylabel='tip'>
In [51]:
df['time'].value_counts().plot(kind = 'pie')
Out[51]:
<AxesSubplot:ylabel='time'>
In [52]:
df['tip'].plot(kind = 'hist')
Out[52]:
<AxesSubplot:ylabel='Frequency'>
In [53]:
import plotly.express as px
In [54]:
data = px.data.iris()
In [55]:
data
Out[55]:
sepal_length sepal_width petal_length petal_width species species_id
0 5.1 3.5 1.4 0.2 setosa 1
1 4.9 3.0 1.4 0.2 setosa 1
2 4.7 3.2 1.3 0.2 setosa 1
3 4.6 3.1 1.5 0.2 setosa 1
4 5.0 3.6 1.4 0.2 setosa 1
... ... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 virginica 3
146 6.3 2.5 5.0 1.9 virginica 3
147 6.5 3.0 5.2 2.0 virginica 3
148 6.2 3.4 5.4 2.3 virginica 3
149 5.9 3.0 5.1 1.8 virginica 3

150 rows × 6 columns

In [56]:
px.scatter(data, x = 'sepal_length', y = 'sepal_width', width = 500, height = 400, color = 'species')
In [57]:
px.scatter(data, x = 'sepal_length', y = 'sepal_width', width = 500, height = 400, color = 'species', trendline = 'ols')
In [58]:
px.histogram(data, x = 'sepal_length')
In [59]:
df = sns.load_dataset('tips')
df
Out[59]:
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 29.03 5.92 Male No Sat Dinner 3
240 27.18 2.00 Female Yes Sat Dinner 2
241 22.67 2.00 Male Yes Sat Dinner 2
242 17.82 1.75 Male No Sat Dinner 2
243 18.78 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [60]:
df['total_bill'] = pd.cut(x = df['total_bill'], bins = 2, labels = ['malo','duzo'])
In [63]:
df
Out[63]:
total_bill tip sex smoker day time size
0 malo 1.01 Female No Sun Dinner 2
1 malo 1.66 Male No Sun Dinner 3
2 malo 3.50 Male No Sun Dinner 3
3 malo 3.31 Male No Sun Dinner 2
4 malo 3.61 Female No Sun Dinner 4
... ... ... ... ... ... ... ...
239 duzo 5.92 Male No Sat Dinner 3
240 duzo 2.00 Female Yes Sat Dinner 2
241 malo 2.00 Male Yes Sat Dinner 2
242 malo 1.75 Male No Sat Dinner 2
243 malo 3.00 Female No Thur Dinner 2

244 rows × 7 columns

In [65]:
sns.catplot(data = df, x = 'total_bill', y = 'tip', kind = 'box')
Out[65]:
<seaborn.axisgrid.FacetGrid at 0x29ef3b60b20>
In [82]:
df = sns.load_dataset('tips')
In [83]:
baza = df.groupby(['day','time'])['tip'].mean().reset_index()
baza
Out[83]:
day time tip
0 Thur Lunch 2.767705
1 Thur Dinner 3.000000
2 Fri Lunch 2.382857
3 Fri Dinner 2.940000
4 Sat Lunch NaN
5 Sat Dinner 2.993103
6 Sun Lunch NaN
7 Sun Dinner 3.255132
In [84]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
baza ['tip'] = imputer.fit_transform(baza[['tip']])
In [87]:
sns.heatmap(baza.pivot('day','time', values = 'tip'))
Out[87]:
<AxesSubplot:xlabel='time', ylabel='day'>